bitkeeper revision 1.607 (3fb80bcdDsC2bacgLhXMLo9Gck9Icg)

author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)

committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>

Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)
author kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)
committer kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)
diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c

index 0546f53582790be837c515f34241fd4f0acdf35e..a51ac43a238c7effbe412d3fefad3dc2ee356f18 100644 (file)
--- a/xen/arch/i386/mm.c
+++ b/xen/arch/i386/mm.c
@@ -213,12 +213,12 @@ long set_gdt(struct task_struct *p,
  {
      /* NB. There are 512 8-byte entries per GDT page. */
      unsigned int i, j, nr_pages = (entries + 511) / 512;
-    unsigned long pfn, *gdt_page, flags;
+    unsigned long pfn, *gdt_page;
      long ret = -EINVAL;
      struct pfn_info *page;
      struct desc_struct *vgdt;
  
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
  
      /* Check the new GDT. */
      for ( i = 0; i < nr_pages; i++ )
@@ -284,7 +284,7 @@ long set_gdt(struct task_struct *p,
      ret = 0; /* success */
  
   out:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
      return ret;
  }
  
@@ -314,14 +314,14 @@ long do_set_gdt(unsigned long *frame_list, unsigned int entries)
  long do_update_descriptor(
      unsigned long pa, unsigned long word1, unsigned long word2)
  {
-    unsigned long *gdt_pent, flags, pfn = pa >> PAGE_SHIFT;
+    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
      struct pfn_info *page;
      long ret = -EINVAL;
  
      if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
          return -EINVAL;
  
-    spin_lock_irqsave(&current->page_lock, flags);
+    spin_lock(&current->page_lock);
  
      page = frame_table + pfn;
      if ( (page->flags & PG_domain_mask) != current->domain )
@@ -353,6 +353,6 @@ long do_update_descriptor(
      ret = 0; /* success */
  
   out:
-    spin_unlock_irqrestore(&current->page_lock, flags);
+    spin_unlock(&current->page_lock);
      return ret;
  }
diff --git a/xen/common/memory.c b/xen/common/memory.c

index c2349d3240af4476397026722e398ac8c7f637a3..01c846542ac30f92092efee59c31c09caaf03945 100644 (file)
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -132,6 +132,7 @@
  #include <xeno/sched.h>
  #include <xeno/errno.h>
  #include <xeno/perfc.h>
+#include <xeno/interrupt.h>
  #include <asm/page.h>
  #include <asm/flushtlb.h>
  #include <asm/io.h>
@@ -253,11 +254,15 @@ int map_ldt_shadow_page(unsigned int off)
  {
      struct task_struct *p = current;
      unsigned long addr = p->mm.ldt_base + (off << PAGE_SHIFT);
-    unsigned long l1e, *ldt_page, flags;
+    unsigned long l1e, *ldt_page;
      struct pfn_info *page;
      int i, ret = -1;
  
-    spin_lock_irqsave(&p->page_lock, flags);
+    /* We cannot take a page_lock in interrupt context. */
+    if ( in_interrupt() )
+        BUG();
+
+    spin_lock(&p->page_lock);
  
      __get_user(l1e, (unsigned long *)(linear_pg_table+(addr>>PAGE_SHIFT)));
      if ( unlikely(!(l1e & _PAGE_PRESENT)) )
@@ -294,7 +299,7 @@ int map_ldt_shadow_page(unsigned int off)
      ret = 0;
  
   out:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
      return ret;
  }
  
@@ -865,7 +870,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
  
          err = 1;
  
-        spin_lock_irq(&current->page_lock);
+        spin_lock(&current->page_lock);
  
          /* Get the page-frame number that a non-extended command references. */
          if ( (cmd == MMU_NORMAL_PT_UPDATE) || 
@@ -974,7 +979,7 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
          }
  
      unlock:
-        spin_unlock_irq(&current->page_lock);
+        spin_unlock(&current->page_lock);
  
          if ( unlikely(err) )
          {
@@ -1015,7 +1020,7 @@ int do_update_va_mapping(unsigned long page_nr,
      if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
          goto out;
  
-    spin_lock_irq(&p->page_lock);
+    spin_lock(&p->page_lock);
  
      /* Check that the VA's page-directory entry is present.. */
      if ( unlikely((err = __get_user(_x, (unsigned long *)
@@ -1047,7 +1052,7 @@ int do_update_va_mapping(unsigned long page_nr,
      if ( unlikely(cr0 != 0) )
          write_cr0(cr0);
   unlock_and_out:
-    spin_unlock_irq(&p->page_lock);
+    spin_unlock(&p->page_lock);
   out:
      return err;
  }
diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c

index bc4dad260b826dbd39db5025dd26965fe1055cc3..6172fd68582def147ccdc7a097f01df1e8f8c1f9 100644 (file)
--- a/xen/drivers/block/xen_block.c
+++ b/xen/drivers/block/xen_block.c
@@ -58,6 +58,8 @@ static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED;
  static kmem_cache_t *buffer_head_cachep;
  static atomic_t nr_pending;
  
+static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
+
  static int __buffer_is_valid(struct task_struct *p, 
                               unsigned long buffer, 
                               unsigned short size,
@@ -166,41 +168,68 @@ static void maybe_trigger_io_schedule(void)
  
  /******************************************************************
   * COMPLETION CALLBACK -- Called as bh->b_end_io()
- * NB. This can be called from interrupt context!
   */
  
+static void end_block_io_op_softirq(struct softirq_action *h)
+{
+    pending_req_t *pending_req;
+    struct buffer_head *bh, *nbh;
+    unsigned int cpu = smp_processor_id();
+
+    local_irq_disable();
+    bh = completed_bhs[cpu];
+    completed_bhs[cpu] = NULL;
+    local_irq_enable();
+
+    while ( bh != NULL )
+    {
+        pending_req = bh->pending_req;
+        
+        unlock_buffer(pending_req->domain, 
+                      virt_to_phys(bh->b_data), 
+                      bh->b_size, 
+                      (pending_req->operation==READ));
+        
+        if ( atomic_dec_and_test(&pending_req->pendcnt) )
+        {
+            make_response(pending_req->domain, pending_req->id,
+                          pending_req->operation, pending_req->status);
+            put_task_struct(pending_req->domain);
+            spin_lock(&pend_prod_lock);
+            pending_ring[pending_prod] = pending_req - pending_reqs;
+            PENDREQ_IDX_INC(pending_prod);
+            spin_unlock(&pend_prod_lock);
+            atomic_dec(&nr_pending);
+            maybe_trigger_io_schedule();
+        }
+        
+        nbh = bh->b_reqnext;
+        kmem_cache_free(buffer_head_cachep, bh);
+        bh = nbh;
+    }
+}
+
  static void end_block_io_op(struct buffer_head *bh, int uptodate)
  {
      unsigned long flags;
-    pending_req_t *pending_req = bh->pending_req;
+    unsigned int cpu = smp_processor_id();
  
      /* An error fails the entire request. */
      if ( !uptodate )
      {
          DPRINTK("Buffer not up-to-date at end of operation\n");
-        pending_req->status = 2;
+        bh->pending_req->status = 2;
      }
  
-    unlock_buffer(pending_req->domain, 
-                  virt_to_phys(bh->b_data), 
-                  bh->b_size, 
-                  (pending_req->operation==READ));
-
-    if ( atomic_dec_and_test(&pending_req->pendcnt) )
-    {
-        make_response(pending_req->domain, pending_req->id,
-                      pending_req->operation, pending_req->status);
-        put_task_struct(pending_req->domain);
-        spin_lock_irqsave(&pend_prod_lock, flags);
-        pending_ring[pending_prod] = pending_req - pending_reqs;
-        PENDREQ_IDX_INC(pending_prod);
-        spin_unlock_irqrestore(&pend_prod_lock, flags);
-        atomic_dec(&nr_pending);
-        maybe_trigger_io_schedule();
-    }
+    local_irq_save(flags);
+    bh->b_reqnext = completed_bhs[cpu];
+    completed_bhs[cpu] = bh;
+    local_irq_restore(flags);
  
-    kmem_cache_free(buffer_head_cachep, bh);
+    __cpu_raise_softirq(cpu, BLKDEV_RESPONSE_SOFTIRQ);
  }
+
+
  /* ----[ Syscall Interface ]------------------------------------------------*/
  
  long do_block_io_op(block_io_op_t *u_block_io_op)
@@ -364,10 +393,10 @@ static void unlock_buffer(struct task_struct *p,
                            unsigned short size,
                            int writeable_buffer)
  {
-    unsigned long    pfn, flags;
+    unsigned long    pfn;
      struct pfn_info *page;
  
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
      for ( pfn = buffer >> PAGE_SHIFT; 
            pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
            pfn++ )
@@ -377,7 +406,7 @@ static void unlock_buffer(struct task_struct *p,
              put_page_type(page);
          put_page_tot(page);
      }
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
  }
  
  static int do_block_io_op_domain(struct task_struct *p, int max_to_do)
@@ -438,7 +467,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
      struct buffer_head *bh;
      int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
      unsigned short nr_sects;
-    unsigned long buffer, flags;
+    unsigned long buffer;
      int i, tot_sects;
      pending_req_t *pending_req;
  
@@ -446,7 +475,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
      int new_segs, nr_psegs = 0;
      phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
  
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
  
      /* Check that number of segments is sane. */
      if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) )
@@ -516,7 +545,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
      for ( i = 0; i < nr_psegs; i++ )
          __lock_buffer(phys_seg[i].buffer, phys_seg[i].nr_sects<<9, 
                        (operation==READ));
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
  
      atomic_inc(&nr_pending);
      pending_req = pending_reqs + pending_ring[pending_cons];
@@ -560,7 +589,7 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
      return;
  
   bad_descriptor:
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
      make_response(p, req->id, req->operation, 1);
  } 
  
@@ -574,19 +603,19 @@ static void dispatch_rw_block_io(struct task_struct *p, int index)
  static void make_response(struct task_struct *p, unsigned long id, 
                           unsigned short op, unsigned long st)
  {
-    unsigned long cpu_mask, flags;
+    unsigned long cpu_mask;
      int position;
      blk_ring_t *blk_ring;
  
      /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&p->blk_ring_lock, flags);
+    spin_lock(&p->blk_ring_lock);
      blk_ring = p->blk_ring_base;
      position = p->blk_resp_prod;
      blk_ring->ring[position].resp.id        = id;
      blk_ring->ring[position].resp.operation = op;
      blk_ring->ring[position].resp.status    = st;
      p->blk_resp_prod = blk_ring->resp_prod = BLK_RING_INC(position);
-    spin_unlock_irqrestore(&p->blk_ring_lock, flags);
+    spin_unlock(&p->blk_ring_lock);
      
      /* Kick the relevant domain. */
      cpu_mask = mark_guest_event(p, _EVENT_BLKDEV);
@@ -659,7 +688,13 @@ void initialize_block_io ()
      atomic_set(&nr_pending, 0);
      pending_prod = pending_cons = 0;
      memset(pending_reqs, 0, sizeof(pending_reqs));
-    for ( i = 0; i < MAX_PENDING_REQS; i++ ) pending_ring[i] = i;
+    for ( i = 0; i < MAX_PENDING_REQS; i++ )
+        pending_ring[i] = i;
+    
+    for ( i = 0; i < NR_CPUS; i++ )
+        completed_bhs[i] = NULL;
+        
+    open_softirq(BLKDEV_RESPONSE_SOFTIRQ, end_block_io_op_softirq, NULL);
  
      spin_lock_init(&io_schedule_list_lock);
      INIT_LIST_HEAD(&io_schedule_list);
diff --git a/xen/include/xeno/interrupt.h b/xen/include/xeno/interrupt.h

index c37e4efd73e1f4a99adad5ba7cff39c0d30b1fa7..4af244da05c3bbd883f4b3ad7021585b62c7032c 100644 (file)
--- a/xen/include/xeno/interrupt.h
+++ b/xen/include/xeno/interrupt.h
@@ -21,33 +21,23 @@ struct irqaction {
  };
  
  
-/* Who gets which entry in bh_base.  Things which will occur most often
-   should come first */
-   
  enum {
         TIMER_BH = 0,
-       TQUEUE_BH,
-       SCSI_BH,
-       IMMEDIATE_BH
+       SCSI_BH
  };
  
  #include <asm/hardirq.h>
  #include <asm/softirq.h>
  
  
-
-/* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
-   frequency threaded job scheduling. For almost all the purposes
-   tasklets are more than enough. F.e. all serial device BHs et
-   al. should be converted to tasklets, not to softirqs.
- */
-
  enum
  {
         HI_SOFTIRQ=0,
         NET_RX_SOFTIRQ,
         AC_TIMER_SOFTIRQ,
-       TASKLET_SOFTIRQ
+       TASKLET_SOFTIRQ,
+        BLKDEV_RESPONSE_SOFTIRQ,
+        NET_TX_SOFTIRQ
  };
  
  /* softirq mask and active fields moved to irq_cpustat_t in
diff --git a/xen/include/xeno/netdevice.h b/xen/include/xeno/netdevice.h

index de639a884de8a3f555c8bac81387a118a6e0f317..ea7f56e38e743b2f0310d94a5fae45f23958687e 100644 (file)
--- a/xen/include/xeno/netdevice.h
+++ b/xen/include/xeno/netdevice.h
@@ -40,6 +40,12 @@
  
  struct vlan_group;
  
+extern struct skb_completion_queues {
+    struct sk_buff *rx; /* Packets received in interrupt context. */
+    unsigned int rx_qlen;
+    struct sk_buff *tx; /* Tx buffers defunct in interrupt context. */
+} skb_queue[NR_CPUS] __cacheline_aligned;
+
  /* Backlog congestion levels */
  #define NET_RX_SUCCESS         0   /* keep 'em coming, baby */
  #define NET_RX_DROP            1  /* packet dropped */
@@ -453,12 +459,30 @@ static inline int netif_running(struct net_device *dev)
  }
  
  
-/*
- * Xen does not need deferred skb freeing, as all destructor hook functions 
- * are IRQ safe. Linux needed more care for some destructors...
+/* Use this variant when it is known for sure that it
+ * is executing from interrupt context.
+ */
+static inline void dev_kfree_skb_irq(struct sk_buff *skb)
+{
+       int cpu = smp_processor_id();
+       unsigned long flags;
+       local_irq_save(flags);
+       skb->next = skb_queue[cpu].tx;
+       skb_queue[cpu].tx = skb;
+       __cpu_raise_softirq(cpu, NET_TX_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+/* Use this variant in places where it could be invoked
+ * either from interrupt or non-interrupt context.
   */
-#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb)
-#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb)
+static inline void dev_kfree_skb_any(struct sk_buff *skb)
+{
+       if (in_irq())
+               dev_kfree_skb_irq(skb);
+       else
+               dev_kfree_skb(skb);
+}
  
  extern void            net_call_rx_atomic(void (*fn)(void));
  extern int             netif_rx(struct sk_buff *skb);
diff --git a/xen/net/dev.c b/xen/net/dev.c

index 3ecec620e798e2dda7c8c659f2ccd68f67cf9ccb..e7b9f2d01ccea477944be068136896b3e5641c64 100644 (file)
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -50,7 +50,7 @@
  #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
  #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
  
-static struct sk_buff_head rx_skb_queue[NR_CPUS] __cacheline_aligned;
+struct skb_completion_queues skb_queue[NR_CPUS] __cacheline_aligned;
  
  static int get_tx_bufs(net_vif_t *vif);
  
@@ -607,35 +607,40 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
  
  int netif_rx(struct sk_buff *skb)
  {
-    int this_cpu = smp_processor_id();
-    struct sk_buff_head *q = &rx_skb_queue[this_cpu];
+    int cpu = smp_processor_id();
      unsigned long flags;
  
-    /* This oughtn't to happen, really! */
-    if ( unlikely(skb_queue_len(q) > 100) )
+    local_irq_save(flags);
+
+    if ( unlikely(skb_queue[cpu].rx_qlen > 100) )
      {
+        local_irq_restore(flags);
          perfc_incr(net_rx_congestion_drop);
          return NET_RX_DROP;
      }
  
-    local_irq_save(flags);
-    __skb_queue_tail(q, skb);
+    skb->next = skb_queue[cpu].rx;
+    skb_queue[cpu].rx = skb;
+
      local_irq_restore(flags);
  
-    __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
+    __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ);
  
      return NET_RX_SUCCESS;
  }
  
  static void net_rx_action(struct softirq_action *h)
  {
-    int offset, this_cpu = smp_processor_id();
-    struct sk_buff_head *q = &rx_skb_queue[this_cpu];
-    struct sk_buff *skb;
+    int offset, cpu = smp_processor_id();
+    struct sk_buff *skb, *nskb;
  
      local_irq_disable();
-    
-    while ( (skb = __skb_dequeue(q)) != NULL )
+    skb = skb_queue[cpu].rx;
+    skb_queue[cpu].rx = NULL;
+    skb_queue[cpu].rx_qlen = 0;
+    local_irq_enable();
+
+    while ( skb != NULL )
      {
          ASSERT(skb->skb_type == SKB_ZERO_COPY);
  
@@ -652,7 +657,7 @@ static void net_rx_action(struct softirq_action *h)
          skb_push(skb, ETH_HLEN);
          skb->mac.raw = skb->data;
          
-        netdev_rx_stat[this_cpu].total++;
+        netdev_rx_stat[cpu].total++;
          
          if ( skb->dst_vif == NULL )
              skb->dst_vif = net_get_target_vif(
@@ -668,10 +673,11 @@ static void net_rx_action(struct softirq_action *h)
          }
  
          unmap_domain_mem(skb->head);
+
+        nskb = skb->next;
          kfree_skb(skb);
+        skb = nskb;
      }
-
-    local_irq_enable();
  }
  
  
@@ -823,39 +829,58 @@ static inline void maybe_schedule_tx_action(void)
  }
  
  
+static void net_tx_gc(struct softirq_action *h)
+{
+    int cpu = smp_processor_id();
+    struct sk_buff *skb, *nskb;
+
+    local_irq_disable();
+    skb = skb_queue[cpu].tx;
+    skb_queue[cpu].tx = NULL;
+    local_irq_enable();
+
+    while ( skb != NULL )
+    {
+        nskb = skb->next;
+        __kfree_skb(skb);
+        skb = nskb;
+    }
+}
+
  /* Destructor function for tx skbs. */
  static void tx_skb_release(struct sk_buff *skb)
  {
      int i;
-    net_vif_t *vif = skb->src_vif;
-    unsigned long flags;
+    net_vif_t *vif;
+
+    vif = skb->src_vif;
      
-    spin_lock_irqsave(&vif->domain->page_lock, flags);
+    spin_lock(&vif->domain->page_lock);
      for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
          put_page_tot(skb_shinfo(skb)->frags[i].page);
-    spin_unlock_irqrestore(&vif->domain->page_lock, flags);
-
+    spin_unlock(&vif->domain->page_lock);
+    
      if ( skb->skb_type == SKB_NODATA )
          kmem_cache_free(net_header_cachep, skb->head);
-
+    
      skb_shinfo(skb)->nr_frags = 0; 
-
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    
+    spin_lock(&vif->tx_lock);
      __make_tx_response(vif, skb->guest_id, RING_STATUS_OK);
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
-
+    spin_unlock(&vif->tx_lock);
+    
      /*
-     * Checks below must happen after the above response is posted.
-     * This avoids a possible race with a guest OS on another CPU.
+     * Checks below must happen after the above response is posted. This avoids
+     * a possible race with a guest OS on another CPU.
       */
      smp_mb();
-
+    
      if ( (vif->tx_cons == vif->tx_prod) && get_tx_bufs(vif) )
      {
          add_to_net_schedule_list_tail(vif);
          maybe_schedule_tx_action();        
      }
-
+    
      put_vif(vif);
  }
  
@@ -1849,12 +1874,11 @@ static int get_tx_bufs(net_vif_t *vif)
      struct sk_buff     *skb;
      tx_req_entry_t      tx;
      int                 i, j, ret = 0;
-    unsigned long       flags;
  
      if ( vif->tx_req_cons == shared_idxs->tx_req_prod )
          return 0;
  
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    spin_lock(&vif->tx_lock);
  
      /* Currently waiting for more credit? */
      if ( vif->remaining_credit == 0 )
@@ -2013,7 +2037,7 @@ static int get_tx_bufs(net_vif_t *vif)
          vif->tx_prod = j;
  
   out:
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
+    spin_unlock(&vif->tx_lock);
  
      return ret;
  }
@@ -2063,14 +2087,14 @@ static long get_bufs_from_vif(net_vif_t *vif)
          pte_pfn = rx.addr >> PAGE_SHIFT;
          pte_page = frame_table + pte_pfn;
              
-        spin_lock_irq(&p->page_lock);
+        spin_lock(&p->page_lock);
          if ( (pte_pfn >= max_page) || 
               ((pte_page->flags & (PG_type_mask | PG_domain_mask)) != 
                (PGT_l1_page_table | p->domain)) ) 
          {
              DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
                      p->domain, pte_pfn, max_page, pte_page->flags);
-            spin_unlock_irq(&p->page_lock);
+            spin_unlock(&p->page_lock);
              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
              continue;
          }
@@ -2117,7 +2141,7 @@ static long get_bufs_from_vif(net_vif_t *vif)
              
      rx_unmap_and_continue:
          unmap_domain_mem(ptep);
-        spin_unlock_irq(&p->page_lock);
+        spin_unlock(&p->page_lock);
      }
  
      vif->rx_req_cons = i;
@@ -2135,7 +2159,7 @@ static long get_bufs_from_vif(net_vif_t *vif)
  long flush_bufs_for_vif(net_vif_t *vif)
  {
      int i;
-    unsigned long *pte, flags;
+    unsigned long *pte;
      struct pfn_info *page;
      struct task_struct *p = vif->domain;
      rx_shadow_entry_t *rx;
@@ -2143,7 +2167,7 @@ long flush_bufs_for_vif(net_vif_t *vif)
      net_idx_t *shared_idxs = vif->shared_idxs;
  
      /* Return any outstanding receive buffers to the guest OS. */
-    spin_lock_irqsave(&p->page_lock, flags);
+    spin_lock(&p->page_lock);
      for ( i = vif->rx_req_cons; 
            (i != shared_idxs->rx_req_prod) && 
                (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); 
@@ -2181,13 +2205,13 @@ long flush_bufs_for_vif(net_vif_t *vif)
          make_rx_response(vif, rx->id, 0, RING_STATUS_DROPPED, 0);
      }
      vif->rx_cons = i;
-    spin_unlock_irqrestore(&p->page_lock, flags);
+    spin_unlock(&p->page_lock);
  
      /*
       * Flush pending transmit buffers. The guest may still have to wait for
       * buffers that are queued at a physical NIC.
       */
-    spin_lock_irqsave(&vif->tx_lock, flags);
+    spin_lock(&vif->tx_lock);
      for ( i = vif->tx_req_cons; 
            (i != shared_idxs->tx_req_prod) && 
                (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
@@ -2197,7 +2221,7 @@ long flush_bufs_for_vif(net_vif_t *vif)
                             RING_STATUS_DROPPED);
      }
      vif->tx_req_cons = i;
-    spin_unlock_irqrestore(&vif->tx_lock, flags);
+    spin_unlock(&vif->tx_lock);
  
      return 0;
  }
@@ -2236,7 +2260,7 @@ long do_net_io_op(netop_t *uop)
  
      case NETOP_RESET_RINGS:
          /* We take the tx_lock to avoid a race with get_tx_bufs. */
-        spin_lock_irq(&vif->tx_lock);
+        spin_lock(&vif->tx_lock);
          if ( (vif->rx_req_cons != vif->rx_resp_prod) || 
               (vif->tx_req_cons != vif->tx_resp_prod) )
          {
@@ -2249,7 +2273,7 @@ long do_net_io_op(netop_t *uop)
              vif->tx_req_cons = vif->tx_resp_prod = 0;
              ret = 0;
          }
-        spin_unlock_irq(&vif->tx_lock);
+        spin_unlock(&vif->tx_lock);
          break;
  
      case NETOP_GET_VIF_INFO:
@@ -2297,12 +2321,11 @@ static void make_rx_response(net_vif_t     *vif,
                               unsigned char  st,
                               unsigned char  off)
  {
-    unsigned long flags;
      unsigned int pos;
      rx_resp_entry_t *resp;
  
      /* Place on the response ring for the relevant domain. */ 
-    spin_lock_irqsave(&vif->rx_lock, flags);
+    spin_lock(&vif->rx_lock);
      pos  = vif->rx_resp_prod;
      resp = &vif->shared_rings->rx_ring[pos].resp;
      resp->id     = id;
@@ -2317,19 +2340,24 @@ static void make_rx_response(net_vif_t     *vif,
          unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET);
          guest_event_notify(cpu_mask);    
      }
-    spin_unlock_irqrestore(&vif->rx_lock, flags);
+    spin_unlock(&vif->rx_lock);
  }
  
  
  int setup_network_devices(void)
  {
-    int i, ret;
+    int ret;
      extern char opt_ifname[];
  
-    for ( i = 0; i < smp_num_cpus; i++ )
-        skb_queue_head_init(&rx_skb_queue[i]);
+    memset(skb_queue, 0, sizeof(skb_queue));
  
+    /* Actual receive processing happens in softirq context. */
      open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+
+    /* Processing of defunct transmit buffers happens in softirq context. */
+    open_softirq(NET_TX_SOFTIRQ, net_tx_gc, NULL);
+
+    /* Tranmit scheduling happens in a tasklet to exclude other processors. */
      tasklet_enable(&net_tx_tasklet);
  
      if ( (the_dev = dev_get_by_name(opt_ifname)) == NULL )
author	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)
committer	kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
	Sun, 16 Nov 2003 23:44:13 +0000 (23:44 +0000)
xen/arch/i386/mm.c		patch \| blob \| history
xen/common/memory.c		patch \| blob \| history
xen/drivers/block/xen_block.c		patch \| blob \| history
xen/include/xeno/interrupt.h		patch \| blob \| history
xen/include/xeno/netdevice.h		patch \| blob \| history
xen/net/dev.c		patch \| blob \| history